'''
http://www.orangepi.cn
water
2017-12-01
down orangepi detail
'''

import os
import codecs
import requests

root = "D:/Pi"
home = "http://www.orangepi.cn"
default = ""

def find_str_between(l, s_s, s_e):
    if l.find(s_s) == -1:
        return ""
    i_s = l.find(s_s) + len(s_s)
    s = l[i_s:]
    if s_e != "":
        i_e = s.find(s_e)
        s = s[:i_e]
    return s

def html2txt(s):
    lab = find_str_between(s, '<', '>')
    while lab != '':
        s = s.replace('<%s>' % lab, '')
        lab = find_str_between(s, '<', '>')
    return s

def down_img(href, file):
    s_h = href
    s_f = file

    s_p, s_n = os.path.split(s_f)
    if not os.path.exists(s_p):
        os.makedirs(s_p)

    if not os.path.exists(s_f):
        try:
            r = requests.get(s_h)
            f = open(s_f, "wb")
            f.write(r.content)
            f.close()
        except:
            print("\tError: %s" % s_f)

def getpage(url):
    fn = url.replace("/", "-")
    fn = fn.replace(":", "_")
    path = "%s/temp" % root
    if not os.path.exists(path):
        os.makedirs(path)
    fn = "%s/%s" % (path, fn)
    s = ""
    if os.path.exists(fn):
        f = codecs.open(fn, "r", "utf-8")
        s = f.read()
        f.close()
    else:
        try:
            s = requests.get(url).content.decode("utf-8")
            f = codecs.open(fn, "w", "utf-8")
            f.write(s)
            f.close()
        except:
            print("\terror! %s" % url)
            s = ""
    return s

def downdetail(url):
    webpath1, page1 = os.path.split(url)
    webpath2, page2 = os.path.split(webpath1)
    t1 = url.replace(home + '/', "").split('/')[0]
    if t1 == '':
        t1 = 'orangepiplus'
        webpath2 = home
    t1 = 'OPI-' + t1[8:]
    path = "%s/%s" % (root, t1)
    #print(path)
    if not os.path.exists(path):
        os.makedirs(path)

    s = getpage(url)
    if s == "":
        return
    s = s.replace('\r', '')
    s = s.replace('\t', '')
    s = s.replace('<p >\n', '<p >')
    s = s.replace('&nbsp;', ' ')
    s = s.replace('&#215;', '×')
    s = s.replace('</br>\n', '</br>')
    s = s.replace('</br></br>', '</br>')
    ls = s.split('\n')
    b_notes = False
    b_detail = False
    b_table = False
    i_c = 0
    for l in ls:
        if '<head>' in l:
            s_txt = ""
            s_inf = ""
        if '<div class="main">' in l:
            b_notes = False
        if b_notes:
            #print('\t\t' + l)
            s_txt += l + '\n'
            if '<img ' in l:
                src = webpath2 + find_str_between(l, 'src="', '"')
                p1, f1 = os.path.split(src)
                s_file = '%s/opi_%s' % (path, f1)
                #print(src)
                #print(s_file)
                down_img(src, s_file)
        if '<div id="shangyiceng" style=" clear:both">' in l:
            b_notes = True
        if '</table>' in l:
            b_table = False
        if b_table:
            #print(l)
            l = l.replace('</br>', '; ')
            if 'colspan="3"' not in l:
                if '<td ' in l:
                    if i_c == 0:
                        s_c1 = find_str_between(l, '<p >', '<')
                        i_c += 1
                    else:
                        s_c2 = find_str_between(l, '<p >', '<')
                        i_c = 0
                        s_inf += "%-30s %s\n" % (s_c1, s_c2)
                        print("\t%-30s %s" % (s_c1, s_c2))
        if '<div class="main">' in l:
            b_table = True
            i_c = 0
        if '</html>' in l:
            s_txt = html2txt(s_txt)
            f = codecs.open(path + '/notes.txt', 'w', 'utf-8')
            f.write(s_txt)
            f.close()

            f = codecs.open(path + '/detail.txt', 'w', 'utf-8')
            f.write(s_inf)
            f.close()
def down():
    if not os.path.exists(root):
        os.makedirs(root)

    url = home + default
    s = getpage(url)
    if s == "":
        return

    s_txt = ''
    ls = s.split('\n')
    for l in ls:
        #print(l)
        if '<li><a href="' in l:
            tit = find_str_between(l, '<span>', '<')
            href = home + find_str_between(l, 'href="', '"')
            if tit[:1] == 'O':
                s_txt += "%-25s %s\n" % (tit, href)
                #print("%-25s %s" % (tit, href))
                downdetail(href)

    f = codecs.open(root + '/index_opi.txt', 'w', 'utf-8')
    f.write(s_txt)
    f.close()

if __name__ == "__main__":
    down()